Get the data
url = "https://www.health.ny.gov/statistics/vital_statistics/2019/table24.htm"
url1 = "https://www.health.ny.gov/statistics/vital_statistics/2019/table13.htm"
induced_abortion_finance =
read_html(url) %>%
html_table(header = FALSE) %>%
first() %>%
janitor::clean_names()
live_birth_finance =
read_html(url1) %>%
html_table(header = FALSE) %>%
first() %>%
janitor::clean_names()
data cleaning live birth data
clean_lb_finance= live_birth_finance %>%
select(-c(4,6:8)) %>%
purrr::set_names(c("borough", "total", "Medicaid", "Self_Pay", "Other_Insurance", "Not_Stated")) %>%
slice(14:70) %>%
mutate(
borough = str_replace(borough, "Kings", "Brooklyn"),
borough = ifelse(as.character(borough) == "New York", "Manhattan", as.character(borough)),
borough = str_replace(borough, "Richmond", "Staten_Island"),
total = str_replace(total, ",", ""),
Medicaid = str_replace(Medicaid, ",", ""),
Other_Insurance = str_replace(Other_Insurance, ",", ""),
Self_Pay = str_replace(Self_Pay, ",", ""),
Not_Stated = str_replace(Not_Stated, ",", "")
) %>%
mutate_at(c("total", "Medicaid", "Self_Pay","Other_Insurance", "Not_Stated"), as.numeric) %>%
janitor::clean_names()
data cleaning induced abortions data
clean_ia_finance=
induced_abortion_finance %>%
select(c(1:3,5,6,8)) %>%
purrr::set_names(c("borough", "total", "Medicaid", "Other_Insurance", "Self_Pay", "Not_Stated")) %>%
slice(14:70) %>%
mutate(
borough = str_replace(borough, "Kings", "Brooklyn"),
borough = ifelse(as.character(borough) == "New York", "Manhattan", as.character(borough)),
borough = str_replace(borough, "Richmond", "Staten_Island"),
total = str_replace(total, ",", ""),
Medicaid = str_replace(Medicaid, ",", ""),
Other_Insurance = str_replace(Other_Insurance, ",", ""),
Self_Pay = str_replace(Self_Pay, ",", ""),
Not_Stated = str_replace(Not_Stated, ",", "")
) %>%
mutate_at(c("total", "Medicaid","Other_Insurance", "Self_Pay", "Not_Stated"), as.numeric) %>%
janitor::clean_names()
created regions based on this website: https://statejobs.ny.gov/assets/help/regionMapText.cfm
renaming live births data of counties into regions.
rename_lb_finance= clean_lb_finance %>%
transform(borough = gsub(pattern = "Albany|Columbia|Fulton|Greene|Montgomery|Rensselaer|Saratoga|Schenectady|Schoharie|Warren|Washington", replacement = "Saratoga", borough)) %>%
transform(borough = gsub(pattern = "Franklin|Clinton|Essex|Hamilton", replacement = "Eastern Adirondacks", borough)) %>%
transform(borough = gsub(pattern = "Herkimer|Jefferson|Lewis|Oneida|Oswego|St Lawrence", replacement = "Western Adirondacks", borough)) %>%
transform(borough = gsub(pattern = "Broome|Cayuga|Chenango|Cortland|Madison|Onondaga|Otsego|Tioga|Tompkins", replacement = "Central New York", borough)) %>%
transform(borough=gsub(pattern="Chemung|Genesee|Livingston|Monroe|Ontario|Schuyler|Seneca|Steuben|Wayne|Yates",replacement="Finger Lakes", borough)) %>%
transform(borough=gsub(pattern="Allegany|Cattaraugus|Chautauqua|Erie|Niagara|Orleans|Wyoming",replacement="Western New York", borough)) %>%
transform(borough=gsub(pattern="Delaware|Dutchess|Orange|Putnam|Sullivan|Ulster",replacement="Hudson Valley", borough)) %>%
transform(borough=gsub(pattern="Rockland|Westchester", replacement="Westchester/Rockland", borough)) %>%
transform(borough=gsub(pattern="Suffolk|Nassau", replacement="Long Island", borough)) %>%
group_by(borough) %>%
summarize (
total_rate= sum(total),
total_medicaid= sum(medicaid),
total_other= sum(other_insurance),
total_self= sum(self_pay),
total_not_stated= sum(not_stated))
renaming induced abortion data of counties into regions.
rename_ia_finance= clean_ia_finance %>%
transform(borough = gsub(pattern = "Albany|Columbia|Fulton|Greene|Montgomery|Rensselaer|Saratoga|Schenectady|Schoharie|Warren|Washington", replacement = "Saratoga", borough)) %>%
transform(borough = gsub(pattern = "Franklin|Clinton|Essex|Hamilton", replacement = "Eastern Adirondacks", borough)) %>%
transform(borough = gsub(pattern = "Herkimer|Jefferson|Lewis|Oneida|Oswego|St Lawrence", replacement = "Western Adirondacks", borough)) %>%
transform(borough = gsub(pattern = "Broome|Cayuga|Chenango|Cortland|Madison|Onondaga|Otsego|Tioga|Tompkins", replacement = "Central New York", borough)) %>%
transform(borough=gsub(pattern="Chemung|Genesee|Livingston|Monroe|Ontario|Schuyler|Seneca|Steuben|Wayne|Yates",replacement="Finger Lakes", borough)) %>%
transform(borough=gsub(pattern="Allegany|Cattaraugus|Chautauqua|Erie|Niagara|Orleans|Wyoming",replacement="Western New York", borough)) %>%
transform(borough=gsub(pattern="Delaware|Dutchess|Orange|Putnam|Sullivan|Ulster",replacement="Hudson Valley", borough)) %>%
transform(borough=gsub(pattern="Rockland|Westchester", replacement="Westchester/Rockland", borough)) %>%
transform(borough=gsub(pattern="Suffolk|Nassau", replacement="Long Island", borough)) %>%
group_by(borough) %>%
summarize (
total_rate= sum(total),
total_medicaid= sum(medicaid),
total_other= sum(other_insurance),
total_self= sum(self_pay),
total_not_stated= sum(not_stated))
merging data to get rates from count data
merged_data=
full_join(rename_lb_finance,rename_ia_finance, by="borough") %>%
janitor::clean_names() %>%
mutate(
medicaid = (total_medicaid_y / total_medicaid_x)*1000,
self_pay = (total_self_y / total_self_x)*1000,
other_insurance = (total_other_y / total_other_x)*1000,
not_stated = (total_not_stated_y / total_not_stated_x)*1000,
total = (total_rate_y / total_rate_x)*1000) %>%
select(borough, medicaid, self_pay, other_insurance, not_stated, total) %>%
drop_na()
ggplot: induced abortion rates vs financial plans
abortion_finance_plot =
merged_data %>%
select(-total) %>%
pivot_longer(
medicaid:not_stated,
names_to = "finance_plan",
values_to = "abortion"
) %>%
plot_ly(x = ~finance_plan, y = ~abortion, color = ~borough, type = "bar", colors = "viridis") %>%
layout(title = 'Abortions in NY State by Financial Plan', yaxis = list(title = 'Number of Induced Abortions per 1,000 Live Births'))
abortion_finance_plot